---
title: "Supplemental material"
date: April 12, 2021
output:
  pdf_document:
    toc: true
    number_sections: true
    toc_depth: 2
---

\newpage

Supplemental material to the paper *The security implications of COVID-19 in times of recurrent respiratory viruses: environmental, social, and medical risk factors*

```{r setup, include=FALSE, cache = F}
knitr::opts_chunk$set(echo = FALSE, cache = TRUE, include=FALSE, message=F, warning=F)

library(sf)
library(ggplot2)
library(dplyr)
library(tidyr)
library(zoo)
library(viridis)
library(gridExtra)
library(spdep)
library(parallel)
library(broom)
library(dotwhisker)
library(knitr)
library(readxl)
library(humidity)
library(raster)
library(viridis)
library(stargazer)
library(tibble)
library(ggrepel)
library(gridExtra)
library(cowplot)
library(igraph)
library(RColorBrewer)
library(igraph)
library(readr)
```

```{r load, cache = F}
load("../grid/hex_ita_10km.sf.RData")
load("../air_quality/airquality.df.RData")
load("../air_quality/era5_climate_hex.df.RData")
load("../dead/hex_dead.df.RData")
load("../covid-cases-prov/reg_testing.df.RData")
load("../locked-down-comune/hex_pop_lockdown.RData")
load("../locked-down-comune/lockdown.m.RData")
load("../pop_density/pop_densities.df.RData")
load("../roads/ita_osm_roads.sf.RData")
load("../census_2011/census_polygons.sf.RData")
load("../comm-matrix/hex_commuting_net.g.RData")
load("../roads/primary_road_hex.df.RData")
load("../hotel-comune/hex_beds.df.RData")
load("../icu/hex_icu_beds.df.RData")
load("../income/hex_income.df.RData")

italy_comune.sf <- 
  read_sf("../shapefile/Italy/Limiti01012020/Com01012020/Com01012020_clipped_to_hex.shp")
ita_reg.sf <- 
  read_sf("../shapefile/Italy/Limiti01012020/Reg01012020/Reg01012020_WGS84.shp")
region.sf <- 
  read_sf("../shapefile/Italy/Limiti01012020/Reg01012020/Reg01012020_WGS84.shp")
province.sf <- 
  read_sf("../shapefile/Italy/Limiti01012020/ProvCM01012020/ProvCM01012020_WGS84.shp")

raw_dead_dat.df <- 
  read_csv("../dead/istat-5-mar-2021/comuni_giornaliero_31dicembre.csv.zip", 
           locale = locale(encoding = "ISO-8859-1"))
```

```{r}
predictor_labels <- 
  c(dist_sd_lag4_contact2death = "past excess mortality",
    neigh_weighted_mean = "neighbouring excess mortality",

    pm2p5_conc = "PM2.5 conc.",
    pm10_conc = "PM10 conc.",
    no_conc = "nitrogen monoxide conc.",
    o3_conc = "ozone conc.",
    co_conc = "carbon monoxide conc.",
    no2_conc = "nitrogen dioxide conc.",
    so2_conc = "sulphur dioxide",
    
    era5_temp = "temperature", 
    era5_rhum = "relative humidity", 
    era5_prec = "precipitation",
    
    perc_breast_cancer = "perc. breast cancer incidence (reg.)",
    perc_colorectal_cancer = "perc. colorectal cancer incidence (reg.)",
    perc_lung_cancer = "perc. lung cancer incidence (reg.)",
    CHRONIC_DIAB = "perc. diabetes (reg.)",
    CHRONIC_HYP  = "perc. hypertension (reg.)", 
    CHRONIC_BRONC = "perc. bronchitis (reg.)", 
    CHRONIC_ALLER = "perc. allergies (reg.)",
    CHRONIC_HEART = "perc. heart cond. (reg.)",
    smokers = "perc. smokers (reg.)",
    
    median_density_km2 = "median density",
    road_length = "tot. length primary roads",
    
    perc_employed = "perc. employed",
    perc_degree_over_workforce = "perc. degree (over workforce)",
    perc_female = "perc. females", 
    perc_over_65 = "perc. over 65",
    gini_over74 = "over 74 conc.",
    perc_work_outside_comune = "perc. working out of comune",
    perc_families_over_4 = "perc. families over 4",
    
    hotel_beds_log = "log. hotel beds",
    
    sum_agedcare_log10 = "log. agedcare employees",
    sum_healthcare_log10 = "log. healthcare employees",
    sum_manufacturing.ex_meat_log10 = "log. manufacturing employees (exc. meat)",
    sum_manufacturing.meat_log10 = "log. manufacturing employees (meat only)",
    sum_services_log10 = "log. services employees",
    sum_warehousing_log10 = "log. warehousing employees",
    sum_transport_log10 = "log. transportation employees",
    sum_delivery_log10  = "log. delivery employees",
    sum_construction_log10 = "log. construction employees",
    sum_education_log10 = "log. education employees",
    sum_hospitality_log10 = "log. hospitality employees",
    sum_retail_log10 = "log. retail employees",
    sum_wholesaling_log10 = "log. wholesaling employees",
    sum_other_log10 = "log. other employees",
    
    perc_low_income = "perc. low income",
    case_test_ratio = "cases vs tests ratio (reg.)",
    perc_neigh5_icu_beds = "perc. ICU beds 50km radius",
    `dist_sd_lag4_contact2death:perc_pop_lockdown` = "past excess mort. X perc. lockdown",
    `perc_pop_lockdown:neigh_weighted_mean` = "neighb. excess mort. X perc.  lockdown",
    `dist_sd_lag4_contact2death:neigh_weighted_mean` = "past excess mort. X neighb. excess mort.",
    `dist_sd_lag4_contact2death:perc_pop_lockdown:neigh_weighted_mean` = "past excess mort. X neighb. excess mort. X  perc. lockdown")
```

```{r}
# Region misconding
hex_ita_10km.sf$CODREG[hex_ita_10km.sf$REGIONE == "Abruzzo"] <- '13'
hex_ita_10km.sf$CODREG[hex_ita_10km.sf$REGIONE == "Basilicata"] <- '17'
hex_ita_10km.sf$CODREG[hex_ita_10km.sf$REGIONE == "Emilia-Romagna"] <- '8'
hex_ita_10km.sf$CODREG[hex_ita_10km.sf$REGIONE == "Liguria"] <- '7'
```

# Hexagonal Grid

An hexagonal grid is randomly created to cover the entire Italian territory. Each grid is set to have a size (diameter) of 10-km. Variables associated to each cell are used as the unit of analysis for the regression models. The adoption of a grid of hexagonal cell is justified by the necessity of mapping statistics reported for geographies with significantly different shape and area.


```{r}
ggsave(file = "../fig/hex_grid.pdf", width = 8, height = 10,
       ggplot(hex_ita_10km.sf %>% filter(!is.na(P1))) +
         geom_sf(fill = 'red', size = .5) +
         theme_bw() +
         labs(#title = "Hexagonal grid",
              # caption = sprintf("Hexagonal cell n: %s, cell size: 10 km; Cell area: %s km2\nNote: Cells with zero population based on 2011 census have been removed.",
              #                   nrow(hex_ita_10km.sf %>% filter(!is.na(P1))), 
              #                   round(mean(st_area(hex_ita_10km.sf %>% filter(!is.na(P1)))) / 1000000, 2)),
              x = NULL, y = NULL))
```

The following figure shows the grid of `r nrow(hex_ita_10km.sf %>% filter(!is.na(P1)))` hexagonal cell used in the analysis. Notably, `r nrow(hex_ita_10km.sf %>% filter(is.na(P1)))`  cells have been dropped since based on the 2011 census are not inhabitated. 

![](../fig/hex_grid.pdf)

## Interesection between census sections and hexagonal cells for count variables

```{r eval = T}

my_bbox <- 
  c('xmin' = 8.464479, 'ymin' = 45.475532, 'xmax' = 8.618974, 'ymax' = 45.548670)

my_bbox.m <- 
  matrix(c(my_bbox['xmin'], my_bbox['xmin'], 
           my_bbox['xmax'], my_bbox['xmax'],
           my_bbox['xmin'], my_bbox['ymax'], 
           my_bbox['ymin'], my_bbox['ymin'],
           my_bbox['ymax'], my_bbox['ymax']),
         ncol = 2)

my_bbox.sf <- 
  st_geometry(st_polygon(x = list(my_bbox.m)))

st_crs(my_bbox.sf) <- 4326

my_bbox_buff_1000.sf <- 
  my_bbox.sf %>%
  st_transform(crs = 32632) %>%
  st_buffer(dist = 1000) %>% # 1 kilometers
  st_transform(crs = 4326)

my_bbox_buff_2500.sf <- 
  my_bbox.sf %>%
  st_transform(crs = 32632) %>%
  st_buffer(dist = 2500) %>% # 2.5 kilometers
  st_transform(crs = 4326)

my_bbox_buff_12000.sf <- 
  my_bbox.sf %>%
  st_transform(crs = 32632) %>%
  st_buffer(dist = 12000) %>% # 12 kilometers
  st_transform(crs = 4326)

hex_ita_10km_smpl.sf <- 
  hex_ita_10km.sf %>% 
  dplyr::filter(hex_id %in%
                  st_intersection(hex_ita_10km.sf, 
                                  st_transform(my_bbox_buff_2500.sf, 32632))$hex_id)

census_polygons_smpl.sf <-
  census_polygons.sf %>%
  dplyr::filter(sez2011 %in% st_intersection(census_polygons.sf %>% st_make_valid(), 
                                             st_transform(hex_ita_10km_smpl.sf,
                                                          32632))$sez2011)

census_centroids_smpl.sf <-
  census_polygons_smpl.sf %>% st_centroid()


census_centroids_smpl.sf <- 
  st_intersection(census_centroids_smpl.sf, 
                  hex_ita_10km_smpl.sf)

ggsave(filename = "../fig/census_to_hex_grid.pdf",  width = 12,
       grid.arrange(
         ggplot() +
           geom_sf(data = hex_ita_10km_smpl.sf %>% 
                     st_transform(4326), fill = NA) +
           geom_sf(data = census_polygons_smpl.sf %>% 
                     st_transform(4326), fill = NA, colour = 'gray', size = .2) + 
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_2500.sf)['xmin'], 
                             st_bbox(my_bbox_buff_2500.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_2500.sf)['ymin'], 
                             st_bbox(my_bbox_buff_2500.sf)['ymax'])) +
           labs(title = "1."),
         ggplot() +
           geom_sf(data = hex_ita_10km_smpl.sf %>%
                     st_transform(4326), fill = NA) +
           geom_sf(data = census_polygons_smpl.sf %>% 
                     st_transform(4326), fill = NA, colour = 'gray', size = .2) + 
           geom_sf(data = census_centroids_smpl.sf %>% 
                     st_transform(4326), size = .2) + 
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_2500.sf)['xmin'], 
                             st_bbox(my_bbox_buff_2500.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_2500.sf)['ymin'], 
                             st_bbox(my_bbox_buff_2500.sf)['ymax'])) +
           labs(title = "2."),
         
         ggplot() +
           geom_sf(data = hex_ita_10km_smpl.sf %>% 
                     st_transform(4326), fill = NA) +
           geom_sf(data = census_centroids_smpl.sf %>% 
                     st_transform(4326), size = .6, aes(colour = as.factor(hex_id))) +
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_2500.sf)['xmin'], 
                             st_bbox(my_bbox_buff_2500.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_2500.sf)['ymin'], 
                             st_bbox(my_bbox_buff_2500.sf)['ymax'])) +
           guides(colour = FALSE) +
           labs(title = "3."),
         ncol = 3))
```

To map census count variables measured at the level of census sections with hexagonal cells, we proceeded as following:

1. we computed the centroid, or its geographic centre, of the `r format(nrow(census_polygons.sf), big.mark=",")` census sections;
2. we intersected centroids and hexagonal cells;
3. for each haxagonal cell, we summed the count variables pertaining to the census sections with a centroid contained within the cell.

![](../fig/census_to_hex_grid.pdf)

## Interesection between comune and hexagonal cells for count variables

To map *comune* count variables onto hexagon cells, we proceeded as following: 

1. we clipped unpopulated areas corresponding to the unpopulated hexagon cells identified before from the *comune*;
2. we randomly sampled a number of points within the remaining (populated) area of the *comune* corresponding to the reported count;
3. we counted how many points resulted within each hexagonal cell. 


```{r eval = F}

italy_comune_smpl.sf <-
  italy_comune.sf %>%
  dplyr::filter(PRO_COM_T %in% st_intersection(italy_comune.sf %>% st_make_valid(), 
                                               st_transform(hex_ita_10km_smpl.sf,
                                                            32632))$PRO_COM_T)
italy_comune_pnt_smpl.sf <- 
  st_sample(italy_comune_smpl.sf, sample(10:20, nrow(italy_comune_smpl.sf), replace = T))

italy_comune_pnt_smpl.sf <- 
  st_as_sf(as.data.frame(st_coordinates(italy_comune_pnt_smpl.sf)), 
           coords = c("X", "Y"), crs = 32632)

italy_comune_pnt_PRO_COM_T_smpl.sf <- 
  st_intersection(italy_comune_pnt_smpl.sf, 
                  italy_comune_smpl.sf)

italy_comune_pnt_hex_smpl.sf <- 
  st_intersection(italy_comune_pnt_smpl.sf, 
                  hex_ita_10km.sf)

ggsave(filename = "../fig/comune_to_hex_grid.png", width = 10, height = 4,
       grid.arrange(
         ggplot() +
           geom_sf(data = italy_comune.sf %>% 
                     st_transform(4326), fill = NA, colour = 'black', size = .2) +
           geom_sf(data = hex_ita_10km.sf %>% filter(is.na(P1)), fill = "red", 
                   alpha = .5, colour = 'red') +
           coord_sf(xlim = c(8.51475, 11.31198), 
                    ylim = c(44.69240, 46.58386)) +
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           labs(title = "1."),
         ggplot() +
           geom_sf(data = hex_ita_10km_smpl.sf %>% 
                     st_transform(4326), fill = NA) +
           geom_sf(data = italy_comune_smpl.sf %>% 
                     st_transform(4326), fill = NA, colour = 'gray', size = .2) +
           geom_sf(data = st_intersection(italy_comune_pnt_PRO_COM_T_smpl.sf %>%
                                            st_transform(4326),
                                          my_bbox_buff_2500.sf),
                   size = 2, aes(colour = as.factor(PRO_COM_T))) + 
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_2500.sf)['xmin'], 
                             st_bbox(my_bbox_buff_2500.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_2500.sf)['ymin'], 
                             st_bbox(my_bbox_buff_2500.sf)['ymax'])) +
           guides(colour = FALSE) +
           labs(title = "2."),
         
         ggplot() +
           geom_sf(data = hex_ita_10km_smpl.sf %>% 
                     st_transform(4326), fill = NA) +
           geom_sf(data = italy_comune_smpl.sf %>% 
                     st_transform(4326), fill = NA, colour = 'gray', size = .2) +
           geom_sf(data = st_intersection(italy_comune_pnt_hex_smpl.sf %>%
                                            st_transform(4326),
                                          my_bbox_buff_2500.sf),
                   size = 2, aes(colour = as.factor(hex_id))) + 
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_2500.sf)['xmin'], 
                             st_bbox(my_bbox_buff_2500.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_2500.sf)['ymin'], 
                             st_bbox(my_bbox_buff_2500.sf)['ymax'])) +
           guides(colour = FALSE) +
           labs(title = "3."),
         ncol = 3))

```

![](../fig/comune_to_hex_grid.png)

## Intersection between areas and hexagon cells for continuous variables

When continuous variables were reported for a grid of cells, as with air quality and atmospheric data, or when count variables were reported at the level of comune for multiple income brackets, as in the case of income data, the variable for each hexagonal cell was computed as the average of the values reported for the intersecting cells (or comune) weighted according the extension of the area of intersection.

```{r}
r <- 
  raster(sprintf('../air_quality/no2_conc.tiff', var), band = 10)
res <- 
  raster::extract(r, st_transform(hex_ita_10km_smpl.sf, 4326), fun = mean, weights  = T)

hex_ita_10km_smpl.sf$mean_value <- res[,1]

r_spdf <- 
  as(r, "SpatialPixelsDataFrame")
r_spdf <- 
  as.data.frame(r_spdf)
colnames(r_spdf) <- 
  c("value", "x", "y")

ggsave(file = "../fig/grid_to_hex.pdf", width = 10, height = 4,
       grid.arrange(
         ggplot() +  
           geom_tile(data=r_spdf, aes(x=x, y=y, fill=value), alpha=0.8) +
           scale_fill_viridis(limits = c(5,50)) +
           geom_sf(data = st_transform(hex_ita_10km_smpl.sf, 4326),
                   fill = NA, colour = 'white') +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_12000.sf)['xmin'], 
                             st_bbox(my_bbox_buff_12000.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_12000.sf)['ymin'], 
                             st_bbox(my_bbox_buff_12000.sf)['ymax'])) +
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           labs(x=NULL,y=NULL)  +
           guides(fill = FALSE),
         ggplot() +  
           geom_sf(data = st_transform(hex_ita_10km_smpl.sf, 4326), aes(fill = mean_value)) +
           scale_fill_viridis(limits = c(5,50)) +
           coord_sf(xlim = c(st_bbox(my_bbox_buff_12000.sf)['xmin'], 
                             st_bbox(my_bbox_buff_12000.sf)['xmax']), 
                    ylim = c(st_bbox(my_bbox_buff_12000.sf)['ymin'], 
                             st_bbox(my_bbox_buff_12000.sf)['ymax'])) +
           theme_bw() +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           labs(x=NULL,y=NULL) +
           guides(fill = FALSE),
         ncol = 2))
       

```

![](../fig/grid_to_hex.pdf)


## Intersection between variables reported at the regional level and hexagon cells

For variables reported at the regional level, each hexagon cell assumes the value reported for the region where it is located. In the case of a hexagonal cell intersecting more than one region, the cell is assigned to the region with the largest intersection area.

# Variables

## Excess mortality

```{r}

all_comune <- 
  unique(raw_dead_dat.df$COD_PROVCOM[raw_dead_dat.df$T_15 != "n.d."])
complete_comune <- 
  unique(raw_dead_dat.df$COD_PROVCOM[raw_dead_dat.df$T_20 != "n.d."])
```


Excess mortality is based on Istat figures published in March 2021 ([link](https://www.istat.it/it/archivio/240401)). 

Figures for deaths recorded in every calendar day for the period 2015-19 are reported for `r length(all_comune)` *comune*. Figures for deaths recorded for each calendar day for the first nine months of 2020 are reported for `r length(complete_comune)` *comune* (or `r round(length(complete_comune)/length(all_comune)*100,2)`% of the total). In 2019, these `r length(complete_comune)` *comune* reported `r round(sum(raw_dead_dat.df$T_19[raw_dead_dat.df$COD_PROVCOM %in% complete_comune]) / sum(raw_dead_dat.df$T_19)*100,2)`% of the number of deaths reported in Italy. 

```{r results = 'asis', include = F}

raw_dead_dat.df %>%
  dplyr::group_by(NOME_REGIONE) %>%
  dplyr::summarize(`pop. covered %` = sum(T_19[CL_ETA >= 15 & 
                       COD_PROVCOM %in% complete_comune]  / 
                  sum(T_19[CL_ETA >= 15]))) %>%
  dplyr::arrange(desc(`pop. covered %`)) %>%
  kable(format = 'latex')

```


```{r compute-hex_dead.df}

# Leap year

hex_dead.df <- 
  hex_dead.df %>%
  dplyr::group_by(hex_id, year, day) %>%
  dplyr::summarize(Freq = median(Freq)) 

# hex_dead.df <- 
#   hex_dead.df %>%
#   complete(hex_id, nesting(year, day), fill = list(Freq = 0))

hex_dead.df$hex_id <- 
  as.integer(as.character(hex_dead.df$hex_id))

m <-
  expand.grid(hex_ita_10km.sf$hex_id,
              paste0("T_", 15:20),
              unique(hex_dead.df$day),
              stringsAsFactors = FALSE)

names(m) <- c("hex_id","year","day")

hex_dead.df <-
  merge(hex_dead.df, m,
        by = c("hex_id","year","day"), all.y = T)

hex_dead.df$Freq[is.na(hex_dead.df$Freq)] <- 0
nrow(hex_dead.df)

hex_dead.df <-
  hex_dead.df %>%
  dplyr::group_by(hex_id, year) %>%
  dplyr::arrange(day) %>%
  dplyr::mutate(dead_ma7 = 
                  zoo::rollmean(Freq, 7, align = 'center', fill = NA),
                dead_ma7_lag4 = lag(dead_ma7, n = 4))

hex_dead_wide.df <- 
  hex_dead.df %>%
  dplyr::select(-Freq, -dead_ma7_lag4) %>%
  tidyr::pivot_wider(names_from = year, values_from = dead_ma7)

hex_dead_wide.df$sd_15_19 <- 
  apply(hex_dead_wide.df[,3:7], 1, FUN = sd)
hex_dead_wide.df$mean_15_19 <- 
  apply(hex_dead_wide.df[,3:7], 1, FUN = mean)
hex_dead_wide.df$dist_sd <- 
  (hex_dead_wide.df$T_20 - 
     hex_dead_wide.df$mean_15_19) /
  hex_dead_wide.df$sd_15_19 

hex_dead_wide.df$dist_sd[is.nan(hex_dead_wide.df$dist_sd)] <- 0

## NOTE: Here we set interval infection - death
## Data from https://www.epicentro.iss.it/ 

hex_dead_wide.df <-
  hex_dead_wide.df %>%
  dplyr::group_by(hex_id) %>%
  dplyr::arrange(day) %>%
  mutate(dist_sd_diff14 = c(dist_sd[14:length(dist_sd)], rep(NA, 13)),
         dist_sd_diff15 = c(dist_sd[15:length(dist_sd)], rep(NA, 14)),
         dist_sd_diff16 = c(dist_sd[16:length(dist_sd)], rep(NA, 15)),
         dist_sd_diff17 = c(dist_sd[17:length(dist_sd)], rep(NA, 16)))

hex_dead_wide.df <- 
  rbind(hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) < 413) %>%
          dplyr::mutate(dist_sd_contact2death = dist_sd_diff14),
        hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) >= 413 & 
                           as.numeric(day) < 528) %>%
          dplyr::mutate(dist_sd_contact2death = dist_sd_diff15),
        hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) >= 528 & 
                          as.numeric(day) < 709) %>%
          dplyr::mutate(dist_sd_contact2death = dist_sd_diff16),
        hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) >= 709) %>%
          dplyr::mutate(dist_sd_contact2death = dist_sd_diff17),
  )

save(hex_dead_wide.df, file = "hex_dead_wide.df.RData")

model_vars.df <- 
  hex_dead_wide.df[,c('hex_id', 'day', 'dist_sd_contact2death')]

model_vars.df$hex_id <- as.integer(model_vars.df$hex_id)
model_vars.df$day <- as.Date(paste0(model_vars.df$day,"-2020"), "%m%d-%Y")

hex_dead_wide.df <- 
  hex_dead.df %>%
  dplyr::select(-Freq, -dead_ma7) %>%
  tidyr::pivot_wider(names_from = year, values_from = dead_ma7_lag4)

hex_dead_wide.df$sd_15_19 <- 
  apply(hex_dead_wide.df[,3:7], 1, FUN = sd)
hex_dead_wide.df$mean_15_19 <- 
  apply(hex_dead_wide.df[,3:7], 1, FUN = mean)
hex_dead_wide.df$dist_sd <- 
  (hex_dead_wide.df$T_20 - 
     hex_dead_wide.df$mean_15_19) /
  hex_dead_wide.df$sd_15_19 

hex_dead_wide.df$dist_sd[is.nan(hex_dead_wide.df$dist_sd)] <- 0

hex_dead_wide.df <-
  hex_dead_wide.df %>%
  dplyr::group_by(hex_id) %>%
  dplyr::arrange(day) %>%
  mutate(dist_sd_lag4_diff14 = c(dist_sd[14:length(dist_sd)], rep(NA, 13)),
         dist_sd_lag4_diff15 = c(dist_sd[15:length(dist_sd)], rep(NA, 14)),
         dist_sd_lag4_diff16 = c(dist_sd[16:length(dist_sd)], rep(NA, 15)),
         dist_sd_lag4_diff17 = c(dist_sd[17:length(dist_sd)], rep(NA, 16)))

hex_dead_wide.df <- 
  rbind(hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) < 413) %>%
          dplyr::mutate(dist_sd_lag4_contact2death = dist_sd_lag4_diff14),
        hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) >= 413 & 
                           as.numeric(day) < 528) %>%
          dplyr::mutate(dist_sd_lag4_contact2death = dist_sd_lag4_diff15),
        hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) >= 528 & 
                          as.numeric(day) < 709) %>%
          dplyr::mutate(dist_sd_lag4_contact2death = dist_sd_lag4_diff16),
        hex_dead_wide.df %>%
          dplyr::filter(as.numeric(day) >= 709) %>%
          dplyr::mutate(dist_sd_lag4_contact2death = dist_sd_lag4_diff17)
  )

hex_dead_wide.df$hex_id <- 
  as.integer(hex_dead_wide.df$hex_id)

hex_dead_wide.df$day <- 
  as.Date(paste0(hex_dead_wide.df$day,"-2020"), "%m%d-%Y")

model_vars.df <- 
  merge(model_vars.df, 
        hex_dead_wide.df %>% dplyr::select(hex_id, day, dist_sd_lag4_contact2death), 
        by = c("hex_id", "day"))

model_vars.df <-
  model_vars.df %>% filter(day < as.Date("2020-12-13"))
```

```{r imputation}
model_vars.df$dist_sd_contact2death[
  !is.finite(model_vars.df$dist_sd_contact2death)] <- 0
model_vars.df$dist_sd_lag4_contact2death[
  !is.finite(model_vars.df$dist_sd_lag4_contact2death)] <- 0
```

A video with the excess mortality mapped in Italy for 2020 is available here \url{https://youtu.be/ZuHpZbsbqIY}.

```{r img-sequence, eval = F}

this.sf <- 
    hex_ita_10km.sf

this_global <- 
  model_vars.df %>%
  mutate(pop = hex_ita_10km.sf$P1[match(hex_id, hex_ita_10km.sf$hex_id)]) %>%
  dplyr::filter(!is.na(pop)) %>%
  dplyr::group_by(day) %>%
  dplyr::summarize(excess_mortality = weighted.mean(dist_sd_contact2death, w = pop, na.rm = T)) %>%
  dplyr::filter(day < as.Date("2020-12-13"))

for (day in as.character(unique(model_vars.df$day))) {
  
  print(day)

  this.df <- 
    model_vars.df[model_vars.df$day == day,]
  
  this.sf$value <- 
    this.df$dist_sd_contact2death[match(this.sf$hex_id,
                                 this.df$hex_id)]
  
  this.sf$value_cut <-
    cut(this.sf$value, breaks = c(-Inf, 2, 3, 4, 5, 10, 15, 20, 25, 250))
  
  ggsave(filename = sprintf("../sequence-images/%s-deaths.png", day), width = 7, height = 9,
         grid.arrange(
         ggplot(this.sf) +
           geom_sf(aes(fill = value_cut), colour = NA, drop) +
           scale_fill_brewer(palette = "YlOrRd", drop=FALSE, 
                             direction = 1,
                             guide = guide_legend(reverse = TRUE)) +
           theme_bw() + 
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank()) +
           geom_sf(data = ita_reg.sf, fill = NA) +
           labs(fill = "difference\nfrom 2015-19 mean\n(in SDs)", 
                title = sprintf("Excess mortality - Estimated infection day: %s", format(as.Date(day),"%d %b %Y"))),
   
         ggplot(this_global) + geom_line(aes(x=day, y = excess_mortality)) + 
           geom_vline(xintercept = as.Date(day), colour = 'red', alpha = .7) + theme_bw() +
           theme(panel.border = element_blank()) +
           labs(y = NULL, x = "global weighted mean (in SDs)", caption = "Source: Istat"),
         ncol = 1, heights = c(5, 1)),
         )
    
}

```


We compute excess mortality through the following steps:

1. The daily number of deaths reported for each *comune* between 2015 and 2020 is mapped onto our hexagon cells with the methods detailed above for counted variables. 
2. For each year between 2015 and 2020, we calculated for each hexagon cell a centered 7-day moving average of the number of reported deaths over 65 in each comune for each year from 2015 to 2020. 
3. For each calendar day, the mean and the standard deviation of the moving averages from 2015 to 2019 are calculated. 
4. For each calendar day, the difference between the 2020 moving average and the 2015-2019 mean is divided by the 2015-2019 standard deviation. In doing so, we define the standard deviation as the unit of measurement and normalise differences across cells due to population density. 
5. We shifted the resulting time series backward between 14 and 17 days to the estimated day of the infection resulting in death instead of the actual day of reported death. This takes into consideration changes in the median number of days between symptoms' onset and death in Italy. Based on data published by Italy's Italian National Health Service (Istituto Superiore di Sanità) we set the contact-to-death estimate to 
  * 14 days before 13 April 2020,
  * 15 days before 28 May 2020,
  * 16 days before 9 July 2020,
  * 17 days on and after 9 July 2020.
6. Infinite and non-number values resulting from division by zero are assigned an excess mortality value of zero. 

With this method we are able to estimate the excess mortality for `r length(unique(model_vars.df$hex_id[!is.na(model_vars.df$dist_sd_contact2death)]))` hexagon cells and for each day between `r format(min(model_vars.df$day), "%d %B %Y")` and `r format(max(model_vars.df$day), "%d %B %Y")`.

The distribution of the resulting excess mortality variable is detailed over time and over space in the following figures. 

![](../fig/excess_mortality_ts.pdf)

![](../fig/excess_mortality_map.pdf)

![](../fig/excess_mortality_north_map.pdf)


## Air quality variables

```{r}
names(airquality.df)[2] <- "day"
airquality_spread.df <- 
  airquality.df %>% tidyr::spread(var, value)

model_vars.df <- 
  merge(model_vars.df, airquality_spread.df, by = c("hex_id","day"), all.x = T)
```

Air quality data are obtained from Copernicus Atmosphere Monitoring Service (CAMS, [link](https://ads.atmosphere.copernicus.eu/cdsapp#!/dataset/cams-europe-air-quality-forecasts)) with a 10 kilometres' resolution. In the following figures, CAMS cells are overlayed to our hexagon cells. 

```{r}
r <- 
  raster('../air_quality/pm10_conc.tiff', band = 1)

r_spdf <- as(r, "SpatialPixelsDataFrame")
r_spdf <- as.data.frame(r_spdf)
colnames(r_spdf) <- c("value", "x", "y")

ggsave(file = "../fig/cams_lombardy.pdf", 
       ggplot() +  
         geom_tile(data=r_spdf, aes(x=x, y=y, fill=value), alpha=0.8, colour = 'white') +
         scale_fill_viridis() +
         geom_sf(data = hex_ita_10km.sf %>% st_transform(4326), 
                 fill = NA, colour = "white") +
         geom_sf(data = ita_reg.sf %>% filter(COD_REG == 3), 
                 fill = NA, 
                 colour = 'white', size = 1.5) +
         coord_sf(xlim = c(8.51475, 11.31198), 
                  ylim = c(44.69240, 46.58386)) +
         labs(#title = "CAMS European air quality forecast grid and hexagonal grid in Lombardy",
              #caption = "CAMS cell size: 10 km approx.",
              x = NULL, y = NULL) +
         guides(fill = FALSE))
```

![](../fig/cams_lombardy.pdf)

In the following figures we map instead the evolution of selected air quality measures over Europe and Northern Italy.

![](../fig/airquality_eur_PM25_eur.png)
![](../fig/airquality_northita_PM25.png)

![](../fig/airquality_eur_PM10_eur.png)
![](../fig/airquality_northita_PM10.png)
![](../fig/airquality_eur_Carbon_monoxide_eur.png)
![](../fig/airquality_northita_Carbon_monoxide.png)
![](../fig/airquality_eur_Nitrogen_dioxide_eur.png)
![](../fig/airquality_northita_Nitrogen_dioxide.png)

![](../fig/airquality_eur_Nitrogen_monoxide_eur.png)
![](../fig/airquality_northita_Nitrogen_monoxide.png)

## Atmosphere variables

Atmosphere data are derived from ERA5 ([link](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-pressure-levels)) variables with a 30 kilometres' resolution. In the following figures, ERA5 cells are overlayed to our hexagon cells. 

```{r}
era5_climate_hex.df$day <- era5_climate_hex.df$date

era5_climate_hex_spread.df <-
  era5_climate_hex.df %>%
  dplyr::select(-date) %>%
  tidyr::pivot_wider(names_from = var, values_from = value,
                     names_prefix = "era5_") %>%
  dplyr::mutate(era5_rhum = RH(era5_temp, era5_dewp, isK = TRUE))

model_vars.df <- 
  merge(model_vars.df, 
        era5_climate_hex_spread.df,
        by = c("hex_id", "day"))
```

```{r}
r <- 
  raster('../air_quality/adaptor.mars.internal-1594365022.5107076-24267-22-6f64132e-6be2-435e-8f0c-afc486589c1f.tif', band = 1)

r_spdf <- as(r, "SpatialPixelsDataFrame")
r_spdf <- as.data.frame(r_spdf)
colnames(r_spdf) <- c("value", "x", "y")

ggsave(file = "../fig/era5_lombardy.pdf", 
       ggplot() +  
         geom_tile(data=r_spdf, aes(x=x, y=y, fill=value), alpha=0.8, colour = 'white') +
         scale_fill_viridis() +
         geom_sf(data = hex_ita_10km.sf %>% st_transform(4326), 
                 fill = NA, colour = "white") +
         geom_sf(data = ita_reg.sf %>% filter(COD_REG == 3), 
                 fill = NA, 
                 colour = 'white', size = 1.5) +
         coord_sf(xlim = c(8.51475, 11.31198), 
                  ylim = c(44.69240, 46.58386)) +
         labs(# title = "ERA5 grid and hexagonal grid in Lombardy",
           # caption = "ERA5 cell size: 31 km approx.",
           x = NULL, y = NULL) +
         guides(fill = FALSE))
```

![](../fig/era5_lombardy.pdf)


## Census variables

Demographic variables are computed based on census counts at the level of 2011 census sections. The geographic distribution of the variables used in the model is showed in the following figures.

```{r}
# model_vars.df <- 
hex_ita_10km.df <- hex_ita_10km.sf
st_geometry(hex_ita_10km.df) <- NULL

model_vars.df <- 
  merge(model_vars.df, 
        hex_ita_10km.df, 
        by = "hex_id" ) %>%
  filter(!is.na(P1))

model_vars.df$CODREG <- as.numeric(model_vars.df$CODREG)

model_vars.df$P3[is.na(model_vars.df$P3)] <- 0

model_vars.df <-
  model_vars.df %>%
  mutate(pop_over_65 = P27 + P28 + P29,
         pop_under_65 = P1 - (P27 + P28 + P29),
         perc_over_65 = (P27 + P28 + P29) / P1,
         perc_families_over_4 = (PF5 + PF6 + PF8) / PF1,
         perc_work_outside_comune = P138 / P1,
         perc_employed = P60 / P1, 
         perc_unemployed =  P62 / P60,
         perc_degree_over_workforce = P47 / P60,
         perc_female = P3 / P1)

model_vars.df$perc_families_over_4[
  is.na(model_vars.df$perc_families_over_4)] <- 0

model_vars.df$sum_employees[
  is.na(model_vars.df$sum_employees)] <- 0
model_vars.df$sum_agedcare[
  is.na(model_vars.df$sum_agedcare)] <- 0
model_vars.df$sum_construction[
  is.na(model_vars.df$sum_construction)] <- 0
model_vars.df$sum_delivery[
  is.na(model_vars.df$sum_delivery)] <- 0
model_vars.df$sum_education[
  is.na(model_vars.df$sum_education)] <- 0
model_vars.df$sum_healthcare[
  is.na(model_vars.df$sum_healthcare)] <- 0
model_vars.df$sum_hospitality[
  is.na(model_vars.df$sum_hospitality)] <- 0
model_vars.df$sum_manufacturing.ex_meat[
  is.na(model_vars.df$sum_manufacturing.ex_meat)] <- 0
model_vars.df$sum_manufacturing.meat[
  is.na(model_vars.df$sum_manufacturing.meat)] <- 0
model_vars.df$sum_other[
  is.na(model_vars.df$sum_other)] <- 0
model_vars.df$sum_retail[
  is.na(model_vars.df$sum_retail)] <- 0
model_vars.df$sum_services[
  is.na(model_vars.df$sum_services)] <- 0
model_vars.df$sum_transport[
  is.na(model_vars.df$sum_transport)] <- 0
model_vars.df$sum_warehousing[
  is.na(model_vars.df$sum_warehousing)] <- 0
model_vars.df$sum_wholesaling[
  is.na(model_vars.df$sum_wholesaling)] <- 0
  
model_vars.df$perc_degree_over_workforce[
  !is.finite(model_vars.df$perc_degree_over_workforce)] <- 0

model_vars.df$perc_unemployed[
  !is.finite(model_vars.df$perc_unemployed)] <- 0

myLog10 <- function(x){log10(x+1)}

model_vars.df <- 
  model_vars.df %>%
  mutate_at(.vars = vars(sum_employees:sum_wholesaling),
            .funs = list(log10 = myLog10))
```

```{r}
model_vars.df$median_density_km2 <-
  pop_densities.df$median_density_km2[match(model_vars.df$hex_id,
                                            pop_densities.df$hex_id)]

model_vars.df$gini_over74 <-
  pop_densities.df$gini_over74[match(model_vars.df$hex_id,
                                     pop_densities.df$hex_id)]

model_vars.df$gini_over74[!is.finite(model_vars.df$gini_over74)] <- 0
```

```{r include = TRUE, fig.width = 14, fig.height = 10}

this.sf <- hex_ita_10km.sf
this.df <- model_vars.df[model_vars.df$day == as.Date("2020-01-01"),]

censusPlot <- function(var) {
  
  this.sf$this_var <- 
    this.df[[var]][match(this.sf$hex_id, this.df$hex_id)]

  
  ggplot(this.sf) +
    geom_sf(aes(fill = this_var), colour = NA) +
    scale_fill_viridis() +
    theme_bw() +
    theme(axis.text = element_blank(),
          axis.ticks = element_blank()) +
    labs(x = predictor_labels[var], fill = NULL)
}

plot_list <- 
  lapply(c("perc_female", "perc_over_65", "gini_over74", 
           "perc_work_outside_comune", "perc_families_over_4", 
           "perc_employed", "perc_degree_over_workforce", 
           "sum_agedcare_log10", "sum_construction_log10",
           "sum_delivery_log10", "sum_education_log10", "sum_healthcare_log10",
           "sum_hospitality_log10", "sum_manufacturing.ex_meat_log10",
           "sum_manufacturing.meat_log10", 
           "sum_other_log10", "sum_retail_log10", "sum_services_log10",
           "sum_transport_log10", "sum_warehousing_log10", 
           "sum_wholesaling_log10"), censusPlot)

plot_grid(plotlist = plot_list, ncol = 5, align = 'v')
```

```{r}

ggsave(file = "../fig/excess_mortality_ts.pdf", width = 7, height = 8,
       plot_grid(
         
         model_vars.df %>%
           dplyr::group_by(day) %>%
           dplyr::summarize(perc005 = quantile(dist_sd_contact2death, .005),
                            perc985 = quantile(dist_sd_contact2death, .985),
                            mean = mean(dist_sd_contact2death),
                            medianLOM = median(dist_sd_contact2death[REGIONE == "Lombardia"]), 
                            medianBG = median(dist_sd_contact2death[PROVINCIA == "Bergamo"])) %>%
           ggplot(aes(x = day)) +
           geom_ribbon(aes(ymin = perc005, ymax = perc985, fill = 'red'), 
                       colour = 'red', alpha = .5) +
           geom_line(aes(y=mean, linetype = "1")) +
           geom_line(aes(y=medianLOM, linetype = "2")) +
           geom_line(aes(y=medianBG, linetype = "3")) +
           geom_text_repel(data = data.frame(label = c("6 Mar", "14 Mar", "14 Feb"),
                                             day = as.Date(c("2020-03-06", "2020-03-14", "2020-02-13")),
                                             value = c(18.38478, 6.23809, 0.2390457)),
                           aes(label = label, x=day, y=value),
                           nudge_x      = - 30,
                           nudge_y = + 10,
                           segment.size  = .15,
                           segment.color = "grey40",
                           direction     = "y") +
           theme_bw() +
           scale_fill_identity(name = NULL, guide = 'legend',labels = c('99% distribution')) +
           scale_x_date(limits = c(as.Date("2020-01-01"), as.Date("2020-06-10"))) +
           scale_y_continuous(limits = c(-5, 20)) +
           scale_linetype_manual(name = NULL,
                                 values =c('1'=1,'2'=2,'3'=4), labels = c('Italy (mean)','Lombardia (median)','Province of Bergamo (median)')) +
           labs(x = NULL, y = "excess mortality") +
           theme(legend.position = c(.8, .7)),
         
         model_vars.df %>%
           dplyr::group_by(day) %>%
           dplyr::summarize(perc005 = quantile(dist_sd_contact2death, .005),
                            perc985 = quantile(dist_sd_contact2death, .985),
                            mean = mean(dist_sd_contact2death),
                            medianLOM = median(dist_sd_contact2death[REGIONE == "Lombardia"]), 
                            medianBG = median(dist_sd_contact2death[PROVINCIA == "Bergamo"])) %>%
           ggplot(aes(x = day)) +
           geom_ribbon(aes(ymin = perc005, ymax = perc985, fill = 'red'), 
                       colour = 'red', alpha = .9) +
           geom_line(aes(y=mean, linetype = "1")) +
           geom_line(aes(y=medianLOM, linetype = "2")) +
           theme_bw() +
           # theme(axis.title.y = element_blank(), axis.text.y = element_blank(), 
           #       axis.ticks.y = element_blank()) +
           scale_x_date(limits = c(as.Date("2020-09-01"), as.Date("2020-12-15"))) +
           scale_y_continuous(limits = c(-5, 20)) +
           labs(x = NULL, y = "excess mortality") +
           guides(fill = FALSE, linetype = F),
         ncol = 1)
)


# 
peak_median_hex_id <- 
  model_vars.df %>%
  dplyr::filter(day >= as.Date("2020-03-13") - 10 &
                day <= as.Date("2020-03-13") + 10) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarise(excess_mortality = median(dist_sd_contact2death))

peak_median_hex_id_2ndwave <- 
  model_vars.df %>%
  dplyr::filter(day >= as.Date("2020-11-11") - 10 &
                day <= as.Date("2020-11-11") + 10) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarise(excess_mortality = median(dist_sd_contact2death))

early_median_hex_id <- 
  model_vars.df %>%
  dplyr::filter(day >= as.Date("2020-02-11") &
                day < as.Date("2020-02-23")) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarise(excess_mortality = median(dist_sd_contact2death))

early2_median_hex_id <- 
  model_vars.df %>%
  dplyr::filter(day >= as.Date("2020-02-23") &
                day <= as.Date("2020-02-24")) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarise(excess_mortality = median(dist_sd_contact2death))

hex_ita_10km_excess_mortality.sf <- hex_ita_10km.sf

hex_ita_10km_excess_mortality.sf$peak_median <- 
  peak_median_hex_id$excess_mortality[match(hex_ita_10km_excess_mortality.sf$hex_id, 
                                                    peak_median_hex_id$hex_id)]

hex_ita_10km_excess_mortality.sf$peak_median_2nd_wave <- 
  peak_median_hex_id_2ndwave$excess_mortality[match(hex_ita_10km_excess_mortality.sf$hex_id, 
                                                    peak_median_hex_id_2ndwave$hex_id)]

hex_ita_10km_excess_mortality.sf$early_median <- 
  early_median_hex_id$excess_mortality[match(hex_ita_10km_excess_mortality.sf$hex_id, 
                                             early_median_hex_id$hex_id)]

hex_ita_10km_excess_mortality.sf$early2_median <- 
  early2_median_hex_id$excess_mortality[match(hex_ita_10km_excess_mortality.sf$hex_id, 
                                             early2_median_hex_id$hex_id)]

model_vars_11feb.df <- 
  model_vars.df[model_vars.df$day == "2020-02-11",]

model_vars_20feb.df <- 
  model_vars.df[model_vars.df$day == "2020-02-20",]

hex_ita_10km_excess_mortality.sf$dist_sd_contact2death_20feb <- 
  model_vars_20feb.df$dist_sd_contact2death[match(hex_ita_10km_excess_mortality.sf$hex_id,
                                          model_vars_20feb.df$hex_id)]

hex_ita_10km_excess_mortality.sf$dist_sd_contact2death_11feb <- 
  model_vars_11feb.df$dist_sd_contact2death[match(hex_ita_10km_excess_mortality.sf$hex_id,
                                          model_vars_11feb.df$hex_id)]


lockdown_23feb.sf <- 
  rbind(st_sf(a = 1, 
              geom = italy_comune.sf %>% 
                dplyr::filter(PRO_COM_T %in% 
                                c("098010","098014","098026", "098019",
                                  "098035","098054","098002","098057",
                                  "098062","098047")) %>%
                st_union()),
        st_sf(a=1,
              geom = italy_comune.sf %>% 
                dplyr::filter(PRO_COM_T %in% "028105") %>%
                st_union())
  )

early_hotspots.sf <- 
  rbind(st_sf(a = 1,
        geom = italy_comune.sf %>% 
                dplyr::filter(PRO_COM_T %in% 
                                c("016144","016008")) %>%
                st_union()),
        st_sf(a = 1,
        geom = italy_comune.sf %>% 
                dplyr::filter(PRO_COM_T %in% 
                                '041044') %>%
                st_union()))
        
lockdown_23feb_centroid.df <- 
  as.data.frame(st_coordinates(st_centroid(lockdown_23feb.sf %>%
                                             st_transform(4326))))
lockdown_23feb_centroid.df$label <- 
  c("Codogno",
    "Vo'")

early_hotspots_centroid.df <- 
  as.data.frame(st_coordinates(st_centroid(early_hotspots.sf %>%
                                             st_transform(4326))))
early_hotspots_centroid.df$label <- 
  c("Nembro and Alzano Lombardo",
    "Pesaro")


ggsave(file = "../fig/excess_mortality_north_map.pdf", width = 10, height = 12,
       grid.arrange(
         ggplot(hex_ita_10km_excess_mortality.sf %>% 
                  st_transform(4326) %>%
                  dplyr::filter(!is.na(early_median))) +
           geom_sf(aes(fill = cut(early_median, c(-Inf,4,5,6,7,8,Inf))), colour = NA) +
           scale_fill_brewer(palette = "Reds", 
                             direction = 1, na.value = "white",
                             drop = FALSE) + 
           geom_sf(data = lockdown_23feb.sf, fill = NA, size = .35, 
                   colour = "#31a354") +
           geom_sf(data = ita_reg.sf,
                   fill = NA,
                   colour = 'grey', size = .2, alpha = .5) +
           geom_sf(data = ita_osm_roads.sf, 
                   size = .15, alpha = .25) +
           coord_sf(ylim = c(43.6, 47.09416), xlim = c(6.612583,14)) +
           geom_label_repel(data = lockdown_23feb_centroid.df[1,],
                            aes(x=X,y=Y,label=label),
                            nudge_x = - 10,
                            nudge_y = 10, 
                            segment.size = .1) +
           geom_label_repel(data = lockdown_23feb_centroid.df[2,],
                            aes(x=X,y=Y,label=label),
                            nudge_x = + 10,
                            nudge_y = 10, 
                            segment.size = .1) +
           geom_label_repel(data = early_hotspots_centroid.df[1,],
                            aes(x=X,y=Y,label=label),
                            nudge_y = 10, 
                            segment.size = .1) +
           geom_label_repel(data = early_hotspots_centroid.df[2,],
                            aes(x=X,y=Y,label=label),
                            nudge_x = 10,
                            nudge_y = 1,
                            segment.size = .1) +
           theme_bw() +
           labs(fill = "excess mortality", x = NULL, y = NULL),
         
         ggplot(hex_ita_10km_excess_mortality.sf %>% 
                  st_transform(4326) %>%
                  dplyr::filter(!is.na(early2_median))) +
           geom_sf(aes(fill = cut(early2_median, c(-Inf,4,5,6,7,8,Inf))), colour = NA) +
           scale_fill_brewer(palette = "Reds", 
                             direction = 1, na.value = "white",
                             drop = FALSE) + 
           geom_sf(data = lockdown_23feb.sf, fill = NA, size = .35, 
                   colour = "#31a354") +
           geom_sf(data = ita_reg.sf,
                   fill = NA,
                   colour = 'grey', size = .2, alpha = .5) +
           geom_sf(data = ita_osm_roads.sf, 
                   size = .15, alpha = .25) +
           coord_sf(ylim = c(43.6, 47.09416), xlim = c(6.612583,14)) +
           geom_label_repel(data = lockdown_23feb_centroid.df[1,],
                            aes(x=X,y=Y,label=label),
                            nudge_x = - 10,
                            nudge_y = 10, 
                            segment.size = .1) +
           geom_label_repel(data = lockdown_23feb_centroid.df[2,],
                            aes(x=X,y=Y,label=label),
                            nudge_x = + 10,
                            nudge_y = 10, 
                            segment.size = .1) +
           geom_label_repel(data = early_hotspots_centroid.df[1,],
                            aes(x=X,y=Y,label=label),
                            nudge_y = 10, 
                            segment.size = .1) +
           geom_label_repel(data = early_hotspots_centroid.df[2,],
                            aes(x=X,y=Y,label=label),
                            nudge_x = 10,
                            nudge_y = 1,
                            segment.size = .1) +
           theme_bw() +
           labs(fill = "excess mortality", x = NULL, y = NULL),
         nrow = 2))

over5sd_hex_id_1stwave <- 
  model_vars.df %>%
  dplyr::filter(day >= as.Date("2020-02-01")) %>%
  dplyr::filter(day < as.Date("2020-05-01")) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarise(days_over5sd = sum(dist_sd_contact2death>5)>=10)

over5sd_hex_id_2ndwave <- 
  model_vars.df %>%
  dplyr::filter(day >= as.Date("2020-09-01")) %>%
  dplyr::group_by(hex_id) %>%
  dplyr::summarise(days_over5sd = sum(dist_sd_contact2death>5)>=10)

hex_ita_10km_excess_mortality.sf$days_over5sd_1stwave <- 
  over5sd_hex_id_1stwave$days_over5sd[match(hex_ita_10km_excess_mortality.sf$hex_id, 
                                    over5sd_hex_id_1stwave$hex_id)]

hex_ita_10km_excess_mortality.sf$days_over5sd_2ndwave <- 
  over5sd_hex_id_2ndwave$days_over5sd[match(hex_ita_10km_excess_mortality.sf$hex_id, 
                                    over5sd_hex_id_2ndwave$hex_id)]


plot_row_1st_wave <- 
  plot_grid(ggplot(hex_ita_10km_excess_mortality.sf) +
              geom_sf(aes(fill = peak_median), colour = NA) +
              scale_fill_distiller(palette = "Reds", direction = 1, na.value = "white") +
              geom_sf(data = ita_reg.sf,
                      fill = NA,
                      colour = 'white', size = .2, alpha = .5) +
              theme_bw() +
              labs(fill = "excess mortality") +
              theme(legend.position = 'bottom'),
            ggplot(hex_ita_10km_excess_mortality.sf %>%
                     dplyr::filter(!is.na(P1))) +
              geom_sf(aes(fill = days_over5sd_1stwave), colour = NA) +
              scale_fill_brewer(palette = "Set2", labels = c("<10", "+10")) + 
              geom_sf(data = ita_reg.sf,
                      fill = NA,
                      colour = 'white', size = .2, alpha = .5) +
              # geom_sf(data = ita_osm_roads.sf, 
              #         size = .15, alpha = .25, colour = "gray40") +
              theme_bw() +
              labs(fill = "Days with excess mortality") +
              theme(legend.position = 'bottom'))

plot_row_2nd_wave <- 
  plot_grid(
    ggplot(hex_ita_10km_excess_mortality.sf) +
      geom_sf(aes(fill = peak_median_2nd_wave), colour = NA) +
      scale_fill_distiller(palette = "Reds", direction = 1, na.value = "white") +
      geom_sf(data = ita_reg.sf,
              fill = NA,
              colour = 'white', size = .2, alpha = .5) +
      theme_bw() +
      labs(fill = "excess mortality") +
      theme(legend.position = 'bottom'),
    ggplot(hex_ita_10km_excess_mortality.sf %>%
             dplyr::filter(!is.na(P1))) +
      geom_sf(aes(fill = days_over5sd_2ndwave), colour = NA) +
      scale_fill_brewer(palette = "Set2", labels = c("<10", "+10")) + 
      geom_sf(data = ita_reg.sf,
              fill = NA,
              colour = 'white', size = .2, alpha = .5) +
      # geom_sf(data = ita_osm_roads.sf, 
      #         size = .15, alpha = .25, colour = "gray40") +
      theme_bw() +
      labs(fill = "Days with excess mortality") +
      theme(legend.position = 'bottom'))

title_1st_wave <- 
  ggdraw() + 
  draw_label(
    "First wave (Feb - Apr 2020)",
    fontface = 'bold',
    x = 0,
    hjust = 0
  ) +
  theme(
    plot.margin = margin(0, 0, 0, 7)
  )

title_2nd_wave <- 
  ggdraw() + 
  draw_label(
    "Second wave (Oct - Dec 2020)",
    fontface = 'bold',
    x = 0,
    hjust = 0
  ) +
  theme(
    plot.margin = margin(0, 0, 0, 7)
  )

ggsave(file = "../fig/excess_mortality_map.pdf", width = 14, height = 18,
       plot_grid(
         plot_grid(title_1st_wave, 
                   plot_row_1st_wave, ncol = 1, rel_heights = c(0.1, 1)),
         plot_grid(title_2nd_wave, 
                   plot_row_2nd_wave, ncol = 1, rel_heights = c(0.1, 1)),
         nrow = 2))

# ggplot(hex_ita_10km_excess_mortality.sf %>%
#          dplyr::filter(!is.na(P1)) %>%
#          st_transform(4326)) +
#   geom_sf(aes(fill = value), colour = NA, alpha = .5) +
#   scale_fill_brewer(palette = "Set2", labels = c("<10", "+10")) + 
#   geom_sf(data = ita_roads.sf %>% dplyr::filter(grepl("^A",ref)), 
#           colour = "darkgray", alpha = .95, size = .4) +
#   geom_sf(data = ita_reg.sf,
#           fill = NA,
#           colour = 'white', size = .2, alpha = .5) +
#   coord_sf(ylim = c(43.6, 47.09416), xlim = c(6.612583,14)) +
#   theme_bw() +
#   guides(fill = FALSE)

```

## Testing variables

Testing data are based on official daily figures published by the Italian government ([link](https://github.com/pcm-dpc/COVID-19/)) and reported at the level of region. The first day for which figures are available is `r format(min(reg_testing.df$date), "%d %B %Y")`. The case-to-test ratio is computed dividing the 7-day center-aligned moving average of number of positive tests divided by the  7-day center-aligned moving average of the number of tests. 

The following figure shows the evolution of the case-test ratio in the 20 Italian regions.

```{r}
reg_testing.df$case_test_ratio <- 
  reg_testing.df$cases_ma7 / reg_testing.df$tests_ma7

ggplot(reg_testing.df, aes(x=date, y=case_test_ratio)) +
  geom_line() +
  scale_y_continuous(labels = function(x)paste0(round(x*100),"%")) +
  facet_wrap(denominazione_regione~.) + theme_bw() + 
  labs(x = "case-test ratio", y=NULL)

model_vars.df <- 
  merge(model_vars.df, reg_testing.df, 
        by.x = c("CODREG", "day"), 
        by.y = c("codice_regione", "date"),
        all.x = TRUE)
```

## Lockdown

For each hexagon cell, the percentage of the population estimated to be living under lockdown measures in computed through the following steps: 

1. The areas resulting from the union of the area of the *comune* targetted by the lockdown measures established between 23 February and 31 December 2020. 
2. For each hexagon cell the percentage of the population under lockdown measures was estimated to be 0\% (100\%) if the area of the cell was included (excluded) entirely by the two lockdown areas.
3. When an hexagon cell was only partially covered by one of the two lockdown areas, the percentage of the population under lockdown measures was estimated by 
  1. summing the count of the population of all the census sections pertaining to a *comune* under lockdown measures and, based on the census section's centroid, intersecting with the hexagon cell. 
  2. dividing the resulting sum for the entire population estimated to live in the area of the hexagon cell (see above).

```{r fig.cap = "Number of Italian comune under lockdown measures"}

toprow.p <- 
  data.frame(date = as.Date(paste(colnames(lockdown.m), "2020"), format = "%m%d%Y"),
             n = apply(lockdown.m, 2, sum)) %>%
  ggplot(aes(x = date, y = n)) +
  geom_line() +
  theme_bw() +
  labs(x = NULL, y = "n. comune")


bottomrow.p <- 
  plot_grid(  
    data.frame(date = as.Date(paste(colnames(lockdown.m), "2020"), format = "%m%d%Y"),
               n = apply(lockdown.m, 2, sum)) %>%
      dplyr::filter(date > as.Date("2020-02-15"), 
                    date <= as.Date("2020-03-09")) %>%
      ggplot(aes(x = date, y = n)) +
      geom_bar(stat = 'identity') +
      theme_bw() +
      labs(x = NULL, y = "n. comune"),
    
    data.frame(date = as.Date(paste(colnames(lockdown.m), "2020"), format = "%m%d%Y"),
               n = apply(lockdown.m, 2, sum)) %>%
      dplyr::filter(date > as.Date("2020-10-15"), 
                    date <= as.Date("2020-12-31")) %>%
      ggplot(aes(x = date, y = n)) +
      geom_bar(stat = 'identity') +
      theme_bw() +
      labs(x = NULL, y = "n. comune"))


plot_grid(toprow.p, bottomrow.p, ncol = 1)

```


The following *comune* were subjected to lockdown measures on 23 February 2020: `r cat(paste(italy_comune.sf$COMUNE[italy_comune.sf$PRO_COM_T %in% rownames(lockdown.m)[lockdown.m[,"0223"] == 1]], collapse = ", "))`.

```{r include=T}
italy_comune.sf %>%
  dplyr::filter(PRO_COM_T %in% 
                  rownames(lockdown.m)[lockdown.m[,"0223"] == 1]) %>%
  st_transform(4326) %>%
  ggplot() +
  geom_sf(fill = "red", colour = 'red') +
  geom_sf(data = ita_reg.sf %>% 
            st_transform(4326), fill = NA) +
  theme_bw() +
  coord_sf(xlim = c(8.51475, 12.5), 
           ylim = c(44.69240, 46.58386)) +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank())
```

All the *comune* in Lombardy and in the the following *provincia*: Alessandria, Asti, Modena, Novara, Padua, Parma, Piacenza, Pesaro and Urbino, Reggio Emilia, Rimini, Treviso, Venice, Verbano-Cusio-Ossola, were subjected to lockdown measures on 08 March 2020.

```{r include=T}
italy_comune.sf %>%
  dplyr::filter(PRO_COM_T %in% 
                  rownames(lockdown.m)[lockdown.m[,"0308"] == 1]) %>%
  st_transform(4326) %>%
  ggplot() +
  geom_sf(fill = "red", colour = 'red') +
  geom_sf(data = ita_reg.sf %>% 
            st_transform(4326), fill = NA) +
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank())
```
                      

```{r, fig.cap = "Comuni under lockdown measures on 06 November 2020 and 06 December 2020"}
plot_grid(
  italy_comune.sf %>%
  dplyr::filter(PRO_COM_T %in% 
                  rownames(lockdown.m)[lockdown.m[,"1106"] == 1]) %>%
  st_transform(4326) %>%
  ggplot() +
  geom_sf(fill = "red", colour = 'red') +
  geom_sf(data = ita_reg.sf %>% 
            st_transform(4326), fill = NA) +
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank()),
    italy_comune.sf %>%
  dplyr::filter(PRO_COM_T %in% 
                  rownames(lockdown.m)[lockdown.m[,"1206"] == 1]) %>%
  st_transform(4326) %>%
  ggplot() +
  geom_sf(fill = "red", colour = 'red') +
  geom_sf(data = ita_reg.sf %>% 
            st_transform(4326), fill = NA) +
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank()),
  ncol = 2
)
```


```{r}

model_vars.df$perc_pop_lockdown <- 0



hex_pop_lockdown <-
  hex_pop_lockdown[,c("hex_id", "P1", 
                      names(hex_pop_lockdown[,3:ncol(hex_pop_lockdown)])[order(names(hex_pop_lockdown[,3:ncol(hex_pop_lockdown)]))])]


for (i in 3:(ncol(hex_pop_lockdown)-1)) {
  
  print(i)
  
  this_from <- 
    as.Date(paste0("2020", names(hex_pop_lockdown)[i]), format = "%Y%m%d")
  
  this_to <-
    as.Date(paste0("2020", names(hex_pop_lockdown)[i+1]), format = "%Y%m%d") - 1
  
  this_dat <- 
    hex_pop_lockdown[,c(1, 2, i)]
  
  this_dat$perc_pop_lockdown <-
    this_dat[[3]] / this_dat[[2]]
  
  this_dat$perc_pop_lockdown[!is.finite(this_dat$perc_pop_lockdown)] <- 0 
  
  these_days <- 
    seq(this_from, this_to, by = '1 day')
  
  for (this_hex_id in unique(this_dat$hex_id[this_dat$perc_pop_lockdown>0])) {

    this_perc <-
      this_dat$perc_pop_lockdown[this_dat$hex_id == this_hex_id]

    model_vars.df$perc_pop_lockdown[
      model_vars.df$hex_id == this_hex_id &
        model_vars.df$day %in% these_days
    ] <-
      this_perc

  }
  
}

# First total lockdown

model_vars.df$perc_pop_lockdown[
  model_vars.df$day %in% 
    seq(as.Date("2020-03-10"), 
        as.Date("2020-05-03"), 
        by = 'day')] <- 1

model_vars.df$perc_pop_lockdown[
  model_vars.df$day %in% 
    seq(as.Date("2020-12-24"), 
        as.Date("2020-12-25"), 
        by = 'day')] <- 1

model_vars.df %>% 
  group_by(day) %>% 
  summarize(ld = sum(perc_pop_lockdown>0)) %>%
  ggplot(aes(x=day,y=ld)) +
  geom_bar(stat='identity')

model_vars.df$perc_pop_out_lockdown <- 
  1 - model_vars.df$perc_pop_lockdown

model_vars.df$bin_pop_out_lockdown <- 
  ifelse(model_vars.df$perc_pop_out_lockdown < .5,
         0, 1)

model_vars.df %>% 
  group_by(day) %>% 
  summarize(ld = sum(perc_pop_out_lockdown>.5)) %>%
  ggplot(aes(x=day,y=ld)) +
  geom_bar(stat='identity')

model_vars.df %>% 
  group_by(day) %>% 
  summarize(ld = sum(bin_pop_out_lockdown)) %>%
  ggplot(aes(x=day,y=ld)) +
  geom_bar(stat='identity')

## Days since first total lockdown

model_vars.df <- 
  model_vars.df %>%
  dplyr::group_by(hex_id) %>%
  dplyr::arrange(day) %>%
  dplyr::mutate(dist_tot_lockdown = 
                  seq(1 - which(perc_pop_lockdown == 1)[1], 
                      n() - which(perc_pop_lockdown == 1)[1]))

```

## Commuter-weighted network

A network connecting the hexagon cells and weighted based on the number of people communiting between pairs of cells is estimated based on a *comune*-to-*comune* asymmetric matrix populated with the count of commuters on census day 2011.

Consistently with the approach followed in computing other variables, we proceeded as following:

1. For each *comune*, we computed how its 2011 population distributes across different hexagon cells so to have the following table

| Comune ID | Hexagonal cell ID | Population prop. |
|:---------:|:-----------------:|:----------------:|
|     1     |         1         |        50%       |
|     1     |         2         |        50%       |
|     2     |         1         |        70%       |
|     2     |         2         |        30%       |
|     3     |         3         |        10%       |
|     3     |         4         |        10%       |
|     3     |         5         |        80%       |

2. We joined the resulting table with the long version version of the asymmetric matrix, so to have the following table

| From comune ID | From hexagonal cell ID | From population prop. | To comune ID | N. commuters |
|:--------------:|:----------------------:|:---------------------:|:------------:|:------------:|
|        1       |            1           |          50%          |       2      |      10      |
|        1       |            1           |          50%          |       2      |      10      |
|        1       |            1           |          50%          |       2      |      10      |
|        1       |            1           |          50%          |       2      |      10      |
|        2       |            1           |          70%          |       3      |      15      |

3. We combined this last table with the table generated in the first step joining on `to Comune ID = Comune ID` resulting in 

| ... | From hex. ID | From pop. % | ... | N. commuters | To hex. ID | To pop. % |
|:---:|:------------:|:-----------:|:---:|:------------:|:----------:|:---------:|
|  1  |       1      |     50%     |  2  |      10      |      2     |    30%    |
|  1  |       1      |     50%     |  2  |      10      |      3     |    10%    |
|  1  |       1      |     50%     |  2  |      10      |      4     |    20%    |
|  1  |       1      |     50%     |  2  |      10      |      5     |    10%    |
|  2  |       1      |     70%     |  3  |      15      |      4     |    100%   |

4. For each row of the resulting table, we then computed $\texttt{weighted commuting} = \texttt{n. commuters} \times \texttt{from pop. \%} \times \texttt{to pop. \%}$ as the relationship between a pair of hexagonal cells definited by the communiting from  $comune_A$ to $comune_B$.

5. For each pair of hexagon cells $A$ and $B$, we sum all the resulting $\texttt{weighted commuting}$ to obtain  $\texttt{weighted commuting}_{\vec{AB}}$ (which is different from $\texttt{weighted commuting}_{\vec{BA}}$).

6. We created direct network mapping the asymmetric relationships between pairs of hexagonal cells, where each relation was assigned weight $\texttt{weighted commuting}_{\vec{AB}}$

7. We simplify the directed network to obtain the undirected network with symmetrical relationship between pairs of hexagonal cell and weight $\texttt{weighted commuting}_{\overline{AB}}$ resulting from $\texttt{weighted commuting}_{\vec{AB}} + \texttt{weighted commuting}_{\vec{BA}}$ illustrated by the following Figure. 

![](../fig/hex_commuters_network.pdf)

The following Figure shows the same network and the communities identified by a community detection algorithm (A Clauset, MEJ Newman, C Moore: Finding community structure in very large networks, http://www.arxiv.org/abs/cond-mat/0408187).

![](../fig/hex_commuters_network_north.pdf)

## Neighboring excess mortality

We computed neighboring excess mortality based on the commuter-weighted network detailed above. For each day and for each cell, neighboring excess mortality was the average excess mortality in the other cells on that day wighted according to the corresponding $\texttt{weighted commuting}_{\overline{AB}}$.

```{r}

hex_commuting_net.el <- 
  as.data.frame(as_edgelist(hex_commuting_net.g, names = TRUE))
hex_commuting_net.el$n <- 
  E(hex_commuting_net.g)$n

funNeighNet <- function(hex_id, hex_commuting_net.el, model_vars.df) {
  
  require(dplyr)
  
  res.df <- 
    data.frame()
  base_el.df <- 
    hex_commuting_net.el[hex_commuting_net.el$V1 == hex_id |
                           hex_commuting_net.el$V2 == hex_id,]
  
  if(nrow(base_el.df) == 0) {return()}
  
  base_el.df$from <- 
    hex_id
  base_el.df$to <- 
    apply(base_el.df[,1:2], 1, 
          FUN = function(x) c(x)[which(c(x) != hex_id)])
  
  all_days.df <- 
    model_vars.df[model_vars.df$hex_id %in% base_el.df$to,
                  c("hex_id",
                    "day",
                    "dist_sd_contact2death")]
  
  all_days.df$n <- 
      base_el.df$n[match(all_days.df$hex_id,
                         base_el.df$to)]
  
  res.df <- 
    all_days.df %>%
    dplyr::group_by(day) %>%
    dplyr::summarise(neigh_weighted_mean = 
                       weighted.mean(dist_sd_contact2death,
                                     w = n))
  
  res.df$hex_id <- hex_id
  
  return(res.df)
}

funNeigh1 <- function(i, hex_ita_10km.nb, all_days, model_vars.df) {
  res.df <- data.frame()
  these_neigh <- hex_ita_10km.nb[[i]]
  for (j in 1:length(all_days)) {
    this.df <- 
      model_vars.df[model_vars.df$day == all_days[j] &
                      model_vars.df$hex_id %in% these_neigh,
                    c("dist_sd_contact2death", 
                      "perc_pop_out_lockdown",
                      "P1")]
    res.df <- rbind(res.df, 
                    data.frame(hex_id = i,
                               day = all_days[j],
                               neigh_weighted_mean = 
                                 weighted.mean(this.df$dist_sd_contact2death,
                                               w = this.df$P1)))
  }
  return(res.df)
}

cl <- makeCluster(10)
neigh.list <- 
  parLapply(cl, hex_ita_10km.df$hex_id, funNeighNet, 
            hex_commuting_net.el, model_vars.df)
neigh.df <- bind_rows(neigh.list)
stopCluster(cl)

model_vars.df <- 
  merge(model_vars.df, 
        neigh.df,
        by = c("hex_id","day"),
        all.x = TRUE)

model_vars.df$neigh_weighted_mean[is.na(model_vars.df$neigh_weighted_mean)] <- 0
```

## Road density

The road density variable is computed based on the length of primary roads and based on OpenStreetMap data. For each hexagon cell, the total length of is computed by summing all the road segements contained in teh cell. Original data and length by cell is reported in the following figure.

![](../fig/osm_primary_road.png)

```{r}
model_vars.df$road_length <- 
  primary_road_hex.df$tot_length[match(model_vars.df$hex_id,
                                       primary_road_hex.df$hex_id)]

funNeigh2 <- function(i, hex_ita_10km.nb, primary_road_hex.df) {
  these_neigh <- 
    hex_ita_10km.nb[[i]]
  this_length <- 
    sum(primary_road_hex.df$tot_length[c(i,these_neigh)])
  return(data.frame(hex_id = i,
                    neigh_sum_road_length = this_length))
}

hex_ita_10km.nb <- 
  poly2nb(hex_ita_10km.sf, queen=TRUE)

cl <- makeCluster(10)
neigh.list <- 
  parLapply(cl, hex_ita_10km.df$hex_id, funNeigh2, 
            hex_ita_10km.nb, primary_road_hex.df)
neigh.df <- bind_rows(neigh.list)
stopCluster(cl)

model_vars.df$neigh_sum_road_length <- 
  neigh.df$neigh_sum_road_length[match(model_vars.df$hex_id,
                                       neigh.df$hex_id)]

model_vars.df$neigh_sum_road_length_sqrt <- 
  sqrt(model_vars.df$neigh_sum_road_length)
```


## Hotel rooms 

For each cell, we computed the number of hotel rooms based on data reported by Istat at the level of *comune* and according to the mapping method for count variables illustrated above.

```{r}


model_vars.df$hotel_beds <- 
  hex_beds.df$hotel_beds[match(model_vars.df$hex_id,
                               hex_beds.df$hex_id)]

model_vars.df$hotel_beds[is.na(model_vars.df$hotel_beds)] <- 0

model_vars.df$hotel_beds_log <- log(model_vars.df$hotel_beds + 1)
```


```{r Smokers}
cod_reg.df <- 
  data.frame(CODREG = hex_ita_10km.sf$CODREG,
             REGIONE = hex_ita_10km.sf$REGIONE,
             stringsAsFactors = F) %>%
  dplyr::distinct(CODREG, REGIONE) %>%
  dplyr::filter(!is.na(CODREG)) %>%
  dplyr::arrange(REGIONE)

dat_smokers <- 
  read.csv("../smokers/DCCV_AVQ_PERSONE_19062020060907198.csv") %>%
  dplyr::filter(grepl("IT([A-Z])(\\d+|[A-Z])", ITTER107) &
                  !ITTER107 %in% c("ITFG","ITCD","ITD1","ITD2") & 
                  Tipo.dato == "fumatori" &
                  Misura == "per 100 persone con le stesse caratteristiche" &
                  TIME == 2019) %>%
  dplyr::arrange(Territorio)

dat_smokers$CODREG <- 
  as.numeric(cod_reg.df$CODREG)

model_vars.df$smokers <- 
  dat_smokers$Value[match(model_vars.df$CODREG, dat_smokers$CODREG)]

```


```{r Health status}
dat_health_status <- 
  read.csv("../health_status_reg/DCCV_AVQ_PERSONE_19062020065748989.csv") %>%
  dplyr::filter(grepl("IT([A-Z])(\\d+|[A-Z])", ITTER107) &
                  !ITTER107 %in% c("ITFG","ITCD","ITD1","ITD2") & 
                  Misura == "per 100 persone con le stesse caratteristiche" &
                  TIME == 2018) %>%
  dplyr::select(-Tipo.dato, -Misura, -TIME, -Seleziona.periodo, 
                -Flag.Codes, -Flags, -MISURA_AVQ) %>%
  tidyr::pivot_wider(names_from = TIPO_DATO_AVQ, values_from = Value)

dat_health_status$CODREG <- 
  dat_smokers$CODREG[match(dat_health_status$ITTER107, dat_smokers$ITTER107)]

colnames(dat_health_status) <- 
  gsub("0_", "", colnames(dat_health_status))

cancer_ita_reg_2019 <- 
  read_excel("../health_status_reg/2019_cancro_ita_reg.xlsx")

regioni_pop_2019 <- 
  read.csv("../health_status_reg/regioni.csv", skip = 1) %>%
  dplyr::filter(Età == "Totale") %>%
  dplyr::select(Regione, Totale.Femmine, Totale.Maschi) %>%
  dplyr::mutate(CODREG = 1:20)

cancer_ita_reg_2019 <- 
  merge(cancer_ita_reg_2019, regioni_pop_2019, by = "CODREG")

cancer_ita_reg_2019 <-
  cancer_ita_reg_2019 %>%
  dplyr::mutate(perc_breast_cancer = 
                  mammella_f / Totale.Femmine,
                perc_colorectal_cancer = 
                  (colonretto_m + colonretto_f) / 
                  (Totale.Femmine + Totale.Maschi),
                perc_lung_cancer = 
                  (polmone_m + polmone_f) / 
                  (Totale.Femmine + Totale.Maschi))

dat_health_status <- 
  merge(dat_health_status, 
        cancer_ita_reg_2019 %>% 
          dplyr::select(CODREG, 
                        perc_breast_cancer, 
                        perc_colorectal_cancer, 
                        perc_lung_cancer), by = "CODREG")

model_vars.df <- 
  merge(model_vars.df,
        dat_health_status %>% dplyr::select(-ITTER107, -Territorio), 
        by = "CODREG")
```

## Icu beds

```{r}
load("../icu/address_unique.df.RData")
```


The number of ICU beds ("terapia intensiva") was computed based on a list of health centres compiled by the Ministry of Health and containing the street address. From that list, we geocoded `r nrow(address_unique.df)` street address with at least one ICU beds using the Google Maps API. The distribution of ICU beds is mapped in the following Figure.

![](../fig/icu_beds.pdf)


```{r}
hex_ita_10km.sf$icu_beds <- 0
hex_ita_10km.sf$icu_beds <- 
  hex_icu_beds.df$icu_beds[match(hex_ita_10km.sf$hex_id,
                                 hex_icu_beds.df$hex_id)]
hex_ita_10km.sf$icu_beds[is.na(hex_ita_10km.sf$icu_beds)] <- 0

names(hex_ita_10km.nb) <- hex_ita_10km.sf$hex_id

g <- 
  graph_from_adj_list(
    hex_ita_10km.nb[which(unlist(lapply(hex_ita_10km.nb, function(x)length(x)>0)))], 
                      mode="all", 
    duplicate=TRUE)

neigh5 <- 
  ego(g, order = 5, nodes = V(g))

funNeighIcu <- function(i) {
  hex_ids <- neigh5[[i]]
  return(sum(hex_ita_10km.sf$icu_beds[hex_ids], na.rm = T) /
    sum(hex_ita_10km.sf$P1[hex_ids], na.rm = T))
}

hex_ita_10km.sf$perc_neigh5_icu_beds <- 
  sapply(hex_ita_10km.sf$hex_id, funNeighIcu)
hex_ita_10km.sf$perc_neigh5_icu_beds[is.na(hex_ita_10km.sf$perc_neigh5_icu_beds)] <- 0
  
model_vars.df$perc_neigh5_icu_beds <- 
  hex_ita_10km.sf$perc_neigh5_icu_beds[match(model_vars.df$hex_id,
                                             hex_ita_10km.sf$hex_id)]

```

```{r Income}

hex_income.df <- 
  hex_income.df %>%
  dplyr::mutate(perc_low_income = 
                  ( X0 + X10000 ) /
                  (X0 + X10000 + X15000 + X26000 + X55000 + X75000 + X120000 + X120000.))

model_vars.df$perc_low_income <- 
  hex_income.df$perc_low_income[match(model_vars.df$hex_id, 
                                      hex_income.df$hex_id)]
```

```{r}
save(model_vars.df, file = "model_vars.df.RData")
```

# Modelling

## Standardised coefficient table

```{r}
hex_wt_missing_value <- 
  unique(model_vars.df$hex_id[is.infinite(model_vars.df$dist_sd_contact2death)])

hex_wt_missing_value <- 
  unique(model_vars.df$hex_id[is.infinite(model_vars.df$dist_sd_lag4_contact2death)])
```


```{r}
model_vars_std.df <-
  model_vars.df %>%
  dplyr::select(hex_id, CODREG, day, perc_pop_lockdown, dist_tot_lockdown,
                dist_sd_contact2death, dist_sd_lag4_contact2death, neigh_weighted_mean,

                no_conc, no2_conc, o3_conc, pm2p5_conc, pm10_conc, co_conc,  so2_conc,
                
                era5_temp, era5_rhum, era5_prec,
                
                perc_breast_cancer, perc_colorectal_cancer, perc_lung_cancer, 
                CHRONIC_DIAB, CHRONIC_HYP, CHRONIC_BRONC, CHRONIC_ALLER, CHRONIC_HEART,
                smokers,
                
                P1, perc_female, perc_over_65, pop_over_65, perc_work_outside_comune, 
                perc_employed, perc_unemployed, perc_degree_over_workforce,
                perc_families_over_4, 
                gini_over74,
                median_density_km2, 
                
                hotel_beds_log,
                road_length,
                
                sum_agedcare_log10, sum_construction_log10, sum_delivery_log10,
                sum_education_log10, sum_healthcare_log10, sum_hospitality_log10,
                sum_manufacturing.ex_meat_log10, sum_manufacturing.meat_log10,
                sum_other_log10, sum_retail_log10, sum_services_log10, sum_transport_log10,
                sum_warehousing_log10, sum_wholesaling_log10, 
                perc_low_income, 
                perc_neigh5_icu_beds, case_test_ratio) %>%
  dplyr::mutate_at(vars(dist_sd_contact2death:case_test_ratio), scale)

pooled_pm2p5 <- 
  lm(dist_sd_contact2death ~ 
       dist_sd_lag4_contact2death * 
       perc_pop_lockdown *
       neigh_weighted_mean +
 
       pm2p5_conc + no_conc + o3_conc + 
       co_conc + no2_conc + so2_conc +
       era5_temp + era5_rhum + era5_prec + 
       
       perc_breast_cancer + perc_colorectal_cancer + perc_lung_cancer +
       CHRONIC_DIAB + CHRONIC_HYP + CHRONIC_BRONC + CHRONIC_ALLER + CHRONIC_HEART +
       smokers +
       
       perc_neigh5_icu_beds + case_test_ratio +

       median_density_km2 + 
       
       perc_female + perc_over_65 + gini_over74 + 
       perc_work_outside_comune +
       perc_families_over_4 + 
       perc_employed + perc_degree_over_workforce +
       perc_low_income +
       
       sum_agedcare_log10 + sum_construction_log10 + sum_delivery_log10 +
       sum_education_log10 + sum_healthcare_log10 + sum_hospitality_log10 +
       sum_manufacturing.ex_meat_log10 + sum_manufacturing.meat_log10 +
       sum_other_log10 + sum_retail_log10 + sum_services_log10 + sum_transport_log10 +
       sum_warehousing_log10 + sum_wholesaling_log10 + 
       
       hotel_beds_log + 
       road_length, 
     data=model_vars_std.df)

pooled_pm10 <- 
  lm(dist_sd_contact2death ~ 
       dist_sd_lag4_contact2death * 
       perc_pop_lockdown *
       neigh_weighted_mean +
 
       pm10_conc + no_conc + o3_conc + 
       co_conc + no2_conc + so2_conc +
       era5_temp + era5_rhum + era5_prec + 
       
       perc_breast_cancer + perc_colorectal_cancer + perc_lung_cancer +
       CHRONIC_DIAB + CHRONIC_HYP + CHRONIC_BRONC + CHRONIC_ALLER + CHRONIC_HEART +
       smokers +
       
       perc_neigh5_icu_beds + case_test_ratio +

       median_density_km2 + 
       
       perc_female + perc_over_65 + gini_over74 + 
       perc_work_outside_comune +
       perc_families_over_4 + 
       perc_employed + perc_degree_over_workforce +
       perc_low_income +
       
       sum_agedcare_log10 + sum_construction_log10 + sum_delivery_log10 +
       sum_education_log10 + sum_healthcare_log10 + sum_hospitality_log10 +
       sum_manufacturing.ex_meat_log10 + sum_manufacturing.meat_log10 +
       sum_other_log10 + sum_retail_log10 + sum_services_log10 + sum_transport_log10 +
       sum_warehousing_log10 + sum_wholesaling_log10 + 
       
       hotel_beds_log + 
       road_length, 
     data=model_vars_std.df)
```


```{r fig.width=10, fig.height=13, eval=F}
vars <- names(pooled$coefficients)
vars <- vars[!grepl("(Intercept)|:", vars)]

funPlot <- function(var) {
  ggplot(model_vars_std.df) + geom_density(aes_string(var)) + theme_bw()
}

plotlist <- lapply(vars, funPlot)
do.call("grid.arrange", c(plotlist, ncol=5))
```


```{r Multilevel}
library(lme4)
multilevel_pm2p5 <- 
  lmer(formula = dist_sd_contact2death ~ 
         dist_sd_lag4_contact2death * 
       perc_pop_lockdown *
       neigh_weighted_mean +
 
       pm2p5_conc + no_conc + o3_conc + 
       co_conc + no2_conc + so2_conc +
       era5_temp + era5_rhum + era5_prec + 
       
       perc_breast_cancer + perc_colorectal_cancer + perc_lung_cancer +
       CHRONIC_DIAB + CHRONIC_HYP + CHRONIC_BRONC + CHRONIC_ALLER + CHRONIC_HEART +
       smokers +
       
       perc_neigh5_icu_beds + case_test_ratio +

       median_density_km2 + 
       
       perc_female + perc_over_65 + gini_over74 + 
       perc_work_outside_comune +
       perc_families_over_4 + 
       perc_employed + perc_degree_over_workforce +
       perc_low_income +
       
       sum_agedcare_log10 + sum_construction_log10 + sum_delivery_log10 +
       sum_education_log10 + sum_healthcare_log10 + sum_hospitality_log10 +
       sum_manufacturing.ex_meat_log10 + sum_manufacturing.meat_log10 +
       sum_other_log10 + sum_retail_log10 + sum_services_log10 + sum_transport_log10 +
       sum_warehousing_log10 + sum_wholesaling_log10 + 
       
       hotel_beds_log + 
       road_length +
         (1|CODREG),
       data = model_vars_std.df)

multilevel_pm10 <- 
  lmer(formula = dist_sd_contact2death ~ 
         dist_sd_lag4_contact2death * 
       perc_pop_lockdown *
       neigh_weighted_mean +
 
       pm10_conc + no_conc + o3_conc + 
       co_conc + no2_conc + so2_conc +
       era5_temp + era5_rhum + era5_prec + 
       
       perc_breast_cancer + perc_colorectal_cancer + perc_lung_cancer +
       CHRONIC_DIAB + CHRONIC_HYP + CHRONIC_BRONC + CHRONIC_ALLER + CHRONIC_HEART +
       smokers +
       
       perc_neigh5_icu_beds + case_test_ratio +

       median_density_km2 + 
       
       perc_female + perc_over_65 + gini_over74 + 
       perc_work_outside_comune +
       perc_families_over_4 + 
       perc_employed + perc_degree_over_workforce +
       perc_low_income +
       
       sum_agedcare_log10 + sum_construction_log10 + sum_delivery_log10 +
       sum_education_log10 + sum_healthcare_log10 + sum_hospitality_log10 +
       sum_manufacturing.ex_meat_log10 + sum_manufacturing.meat_log10 +
       sum_other_log10 + sum_retail_log10 + sum_services_log10 + sum_transport_log10 +
       sum_warehousing_log10 + sum_wholesaling_log10 + 
       
       hotel_beds_log + 
       road_length +
         (1|CODREG),
       data = model_vars_std.df)
```

```{r, results = 'asis', include = T}

coef_1 <- 
  names(coef(pooled_pm2p5))
coef_1.df <- 
  data.frame(coef = coef_1)
coef_1.df$labs <- 
  predictor_labels[match(coef_1, names(predictor_labels))]

coef_1.df$labs[coef_1.df$coef == "(Intercept)"] <- "(Intercept)"
coef_1.df$labs[coef_1.df$coef == "perc_pop_lockdown"] <- "perc. lockdown"

coef_2 <- 
  names(coef(pooled_pm10))
coef_2.df <- 
  data.frame(coef = coef_2)
coef_2.df$labs <- 
  predictor_labels[match(coef_2, names(predictor_labels))]

coef_2.df$labs[coef_2.df$coef == "(Intercept)"] <- "(Intercept)"
coef_2.df$labs[coef_2.df$coef == "perc_pop_lockdown"] <- "perc. lockdown"

stargazer(pooled_pm2p5, pooled_pm10, multilevel_pm2p5, multilevel_pm10, 
          dep.var.labels = "Excess mortality (backdated by 17 days)",
          header=FALSE, type='latex',
          covariate.labels = c(coef_1.df$labs[2:5], coef_2.df$labs[5:nrow(coef_2.df)]),
          single.row=TRUE, 
          font.size = "scriptsize", column.sep.width = "-15pt")
```

```{r}
stargazer(pooled_pm2p5, pooled_pm10, multilevel_pm2p5, multilevel_pm10, 
          type = 'html', dep.var.labels = "Excess mortality (backdated by 17 days)",
          out = '../fig/regression_table.html',
          covariate.labels = c(coef_1.df$labs[2:5],coef_2.df$labs[5:nrow(coef_2.df)]),
          single.row=TRUE)
```


\pagebreak

## Standardised coefficient plot

```{r}
pooled_pm2p5_coef.df <- 
  as.data.frame(coef(summary(pooled_pm2p5))) %>%
  dplyr::mutate(var = rownames(coef(summary(pooled_pm2p5))))

multilevel_pm2p5_coef.df <- 
  as.data.frame(coef(summary(multilevel_pm2p5))) %>%
  dplyr::mutate(var = rownames(coef(summary(multilevel_pm2p5))))

models_pm2p5_coef.df <- 
  rbind(pooled_pm2p5_coef.df %>% 
          dplyr::select(Estimate, `Std. Error`, var) %>%
          dplyr::mutate(model = "pooled"), 
        multilevel_pm2p5_coef.df %>% 
          dplyr::select(Estimate, `Std. Error`, var) %>%
          dplyr::mutate(model = "multilevel"))

models_pm2p5_coef.df$label <- 
  predictor_labels[match(models_pm2p5_coef.df$var, 
                         names(predictor_labels))]

models_pm2p5_coef.df$label <- 
  factor(models_pm2p5_coef.df$label, 
         levels = rev(predictor_labels),
         ordered = T)

pooled_pm10_coef.df <- 
  as.data.frame(coef(summary(pooled_pm10))) %>%
  dplyr::mutate(var = rownames(coef(summary(pooled_pm10))))

multilevel_pm10_coef.df <- 
  as.data.frame(coef(summary(multilevel_pm10))) %>%
  dplyr::mutate(var = rownames(coef(summary(multilevel_pm10))))

models_pm10_coef.df <- 
  rbind(pooled_pm10_coef.df %>% 
          dplyr::select(Estimate, `Std. Error`, var) %>%
          dplyr::mutate(model = "pooled"), 
        multilevel_pm10_coef.df %>% 
          dplyr::select(Estimate, `Std. Error`, var) %>%
          dplyr::mutate(model = "multilevel"))

models_pm10_coef.df$label <- 
  predictor_labels[match(models_pm10_coef.df$var, 
                         names(predictor_labels))]

models_pm10_coef.df$label <- 
  factor(models_pm10_coef.df$label, 
         levels = rev(predictor_labels),
         ordered = T)

ggsave(filename = "../fig/estimate_entiremodel.pdf", width = 10, height = 12,
       grid.arrange(
       models_pm2p5_coef.df %>%
         dplyr::filter(!is.na(label)) %>%
         ggplot(aes(colour = model, 
                    x=Estimate,
                    y=label)) +
         geom_point() +
         geom_errorbar(aes(xmin=Estimate-`Std. Error`, 
                           xmax=Estimate+`Std. Error`)) +
         theme_bw() +
         labs(y = NULL),
              models_pm10_coef.df %>%
         dplyr::filter(!is.na(label)) %>%
         ggplot(aes(colour = model, 
                    x=Estimate,
                    y=label)) +
         geom_point() +
         geom_errorbar(aes(xmin=Estimate-`Std. Error`, 
                           xmax=Estimate+`Std. Error`)) +
         theme_bw() +
         labs(y = NULL),
       ncol = 1))

ggsave(filename = "../fig/estimate_selectedvars.pdf", width = 10, height = 12,
       grid.arrange(
         models_pm2p5_coef.df %>%
           dplyr::filter(!is.na(label) &
                           !grepl("dist_sd_lag4_contact2death|neigh_weighted_mean|perc_pop_lockdown", var)) %>%
           ggplot(aes(colour = model, 
                      x=Estimate,
                      y=label)) +
           geom_point() +
           geom_errorbar(aes(xmin=Estimate-`Std. Error`, 
                             xmax=Estimate+`Std. Error`)) +
           theme_bw() +
           labs(y = NULL),
         models_pm10_coef.df %>%
           dplyr::filter(!is.na(label) &
                           !grepl("dist_sd_lag4_contact2death|neigh_weighted_mean|perc_pop_lockdown", var)) %>%
           ggplot(aes(colour = model, 
                      x=Estimate,
                      y=label)) +
           geom_point() +
           geom_errorbar(aes(xmin=Estimate-`Std. Error`, 
                             xmax=Estimate+`Std. Error`)) +
           theme_bw() +
           labs(y = NULL),
         ncol = 1
       ))
```

![](../fig/estimate_entiremodel.pdf)

```{r, fig.width = 10, fig.height = 40, eval = FALSE}
all_days <- unique(hex_dead_wide.df$day[!is.na(hex_dead_wide.df$T_20)])

funPlot <- function(day) {
  this_dat <- hex_dead_wide.df[hex_dead_wide.df$day == day,] 
  hex_ita_10km.sf$value <- this_dat$dist_sd[match(hex_ita_10km.sf$hex_id,
                                                  this_dat$hex_id)]
  hex_ita_10km.sf$value_cut <- cut(hex_ita_10km.sf$value, 
                                   breaks = c(-128,-50,-10,-5,0,5,10,50,128),
                                   include.lowest = T)
  return(ggplot() +
           geom_sf(data = hex_ita_10km.sf, aes(fill = value_cut), colour = NA) +
           scale_fill_brewer(palette = "YlOrRd", direction = 1,
                             drop = F) +
           geom_sf(data = region.sf, fill = NA, size = .1) +
           theme(axis.text = element_blank(),
                 axis.ticks = element_blank(),
                 legend.position = 'none') +
           labs(caption = paste0("Day: ", day),
                fill = "Diff."))
}

plot.list <- lapply(all_days, funPlot)
do.call("grid.arrange", c(plot.list, ncol=6))
```

```{r, eval = F}
hex_dead_wide.df$P1 <- 
  hex_ita_10km.df$P1[match(hex_dead_wide.df$hex_id, 
                           hex_ita_10km.df$hex_id)] 
base_geom_line <- 
  hex_dead_wide.df %>%
  filter(!is.na(P1)) %>%
  mutate(date = as.Date(as.Date(paste0("2020", day), format = "%Y%m%d"))) %>%
  group_by(date) %>%
  summarize(mean = weighted.mean(dist_sd, w = P1, na.rm = T),
            upp = quantile(dist_sd, prob = 0.975, na.rm = T),
            low = quantile(dist_sd, prob = 0.025, na.rm = T)) %>%
  ggplot(aes(x=date, y=mean)) +
  geom_line() + 
  geom_ribbon(aes(ymax = upp, ymin = low), alpha = .2) +
  theme_bw() + labs(y = NULL, x = "mean and 95% of the distribution")

```

### Top 20 coefficients

```{r top-coefficients, results = 'asis', include = T}

library(tidyr)

models_merged_coef.df <- 
  rbind(models_pm2p5_coef.df, models_pm10_coef.df) %>%
  dplyr::mutate(Estimate_low = Estimate - `Std. Error`,
                Estimate_upp = Estimate + `Std. Error`) %>%
  dplyr::filter(sign(Estimate_low)==sign(Estimate_upp)) %>%
  dplyr::group_by(var) %>%
  dplyr::filter((n()==4 | (grepl("pm[0-9]", var) & n()==2)) & !is.na(var) & !grepl("X", label)) %>%
  dplyr::group_by(label) %>%
  dplyr::summarise(Estimate = mean(Estimate), 
                   Estimate_low = Estimate_low[which.min(Estimate_low)],
                   Estimate_upp = Estimate_upp[which.min(Estimate_upp)])


models_merged_coef.df %>%
  dplyr::ungroup() %>%
  dplyr::arrange(desc(Estimate)) %>%
  dplyr::top_n(20, wt = Estimate) %>%
  dplyr::mutate(Estimate = round(Estimate, 4),
                Estimate_low = round(Estimate_low, 4),
                Estimate_upp = round(Estimate_upp, 4)) %>%
  kable(format = 'latex')

```

## Predictions for the Province of Bergamo

Predictions are based on the pooled model (with PM 2.5) and limited to the cells of the Province of Bergamo. 


```{r, fig.cap = "Comuni in the Province of Bergamo and major roads", include = T}

hex_id_bg <- 
  hex_ita_10km.sf$hex_id[hex_ita_10km.sf$PROVINCIA %in% "Bergamo"]

bg_bb <- 
  st_bbox(italy_comune.sf %>%
            dplyr::filter(COD_PROV == "16") %>%
            st_transform(4326))

road_bg <- 
  st_crop(ita_osm_roads.sf, bg_bb)


ggplot() + 
  geom_sf(data = 
            italy_comune.sf %>%
            dplyr::filter(COD_PROV == "16"),
          colour = 'white') + 
  geom_sf(data = road_bg,
          size = .15, alpha = .25, colour = "black") + 
  geom_sf(data = 
            hex_ita_10km.sf %>%
            dplyr::filter(hex_id %in% hex_id_bg),
          fill = NA) + 
  theme_bw()

```

Predictions details: 

* Predictions are produced for each cell of the Province of Bergamo and for each day between 1 February 2020 and 30 April 2020.
* Predictions are set with total lockdown measures implemented since 11 February 2020.
* The missing values for the case-to-test-ratio variable are imputed based on the first regional value available (reported for 24 February 2020, when testing started in Italy)
* To include the effect of *past* and *neigbouring* excess  mortality levels are produced in three steps: 
  1. Daily predictions for the `r length(hex_id_bg)` cells of the Province are run based on observed data, with  population under strict lockdown measures set to 100\%. 
  2. The resulting daily excess mortality predictions for each cell are used as neighbouring excess mortality predictors for a daily new prediction. 
  3. This last daily prediciton is used to update to 4-day lagged local excess mortality predictor which is the used in the predictions for the following day.


```{r}

dat_bg <- 
  model_vars_std.df %>%
  dplyr::filter(hex_id %in% hex_id_bg)

dat_bg$case_test_ratio <- as.numeric(dat_bg$case_test_ratio)

dat_bg <- 
  dat_bg %>%
  dplyr::group_by(hex_id) %>%
  dplyr::arrange(day) %>%
  tidyr::fill(case_test_ratio, 
              .direction = "up")

dat_bg$case_test_ratio <- 
  matrix(dat_bg$case_test_ratio, ncol = 1)

dat_bg_lockdown <- 
  dat_bg

dat_bg_lockdown$perc_pop_lockdown[dat_bg_lockdown$day == as.Date('2020-02-11')] <- 1
  
this_seq <- 
  seq(from = as.Date("2020-02-01"),
      to = as.Date("2020-04-30"),
      by = "day")

for (i in 5:length(this_seq)) {
  
  print(i)
  
  this_newdata <- 
    dat_bg_lockdown[dat_bg_lockdown$day == this_seq[i],]
  
  this_newdata$dist_sd_contact2death <- 
    predict(pooled_pm2p5, 
            newdata = this_newdata, 
            type = "response", na.rm = T)
  
  for(j in 1:nrow(this_newdata)) {
    
   print(j)
    
    this_newdata$neigh_weighted_mean[j,1] <-
      weighted.mean(this_newdata$dist_sd_contact2death[-j], 
                    w = this_newdata$P1[-j])
    
  }
  
  this_newdata$dist_sd_contact2death <- 
    predict(pooled_pm2p5, 
            newdata = this_newdata, 
            type = "response", na.rm = T)
  
  dat_bg_lockdown <- 
    dat_bg_lockdown[dat_bg_lockdown$day != this_seq[i],]
  
  dat_bg_lockdown <-
    rbind(dat_bg_lockdown, 
          this_newdata)
  
  dat_bg_lockdown <-
    dat_bg_lockdown %>%
    dplyr::group_by(hex_id) %>%
    dplyr::arrange(day) %>%
    dplyr::mutate(dist_sd_lag4_contact2death = lag(dist_sd_contact2death, 4))
  
}

dat_bg_lockdown <-
  merge(dat_bg_lockdown, 
        dat_bg[, c("hex_id", "day", "dist_sd_contact2death")],
        by = c("hex_id", "day"))

dat_bg_lockdown$COMUNE <- 
  hex_ita_10km.sf$COMUNE[match(dat_bg_lockdown$hex_id,
                               hex_ita_10km.sf$hex_id)]

ggsave(filename = "../fig/bg-prediction.pdf", width = 9, height = 9,
       dat_bg_lockdown %>% 
         dplyr::group_by(hex_id) %>%
         dplyr::mutate(first_lockdown_day = min(day[perc_pop_lockdown == 1])) %>%
         dplyr::ungroup() %>%
         dplyr::filter(day >= as.Date("2020-02-01"), 
                       day < as.Date("2020-05-01"),
                       P1 > quantile(P1, p = .5)) %>%
         ggplot(aes(x = day, y = dist_sd_contact2death.x)) +
         geom_bar(stat='identity') +
         geom_line(aes(y=dist_sd_contact2death.y), colour = 'red') +
         # geom_vline(aes(xintercept = first_lockdown_day)) +
         labs(x = NULL, y = "excess mortality") +
         theme_bw() +
         facet_wrap(COMUNE~., scales = "free_y"))

```

![](../fig/bg-prediction.pdf)
